package contentSpider;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import org.apache.http.client.params.CookiePolicy;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlPage;

public class HTMLUnitTest {

	/**
	 * @param args
	 */
	public static void main(String[] args) throws UnsupportedEncodingException {
		HtmlPage spiderPage = null;
		WebClient webClient = null;
		webClient = new WebClient(BrowserVersion.INTERNET_EXPLORER_8);
		System.setProperty("apache.commons.httpclient.cookiespec", CookiePolicy.BROWSER_COMPATIBILITY);
		webClient.setJavaScriptEnabled(false);
		try {
			String site = "http://www.baidu.com/";
			String bodyString = null;
			spiderPage = webClient.getPage(site);
			spiderPage = webClient.getPage("http://www.baidu.com/link?url=92838f49e33f2e1b5678f12defcb878a92f3dcd32e41b1f924869ac1eb9457207b7ea0b22d81b59971ef468266a4de390ab94b4ea65fc4f6b654f3ea09085c6b9a7873c9c9ecafc8e0356fff98f89901304a124029d19f32ac7c5847a1b2c965d64d4979185ad396f115d0bfca5063d526aa2ba5d1f094380c1948cc0dfa5dacf73fb2567edf4d2ea913a3a13e9996e6a08fa700e05e76bce4636b506030fb3ffd088b3394e4041306cf1ca1dbfef1cb34a8035091a2134897ac21ad1ef863ea8dcf2e4b66c30cef47fe8a5f58911c26745511b5eef6f2d39102c223721984aebb8d64e84e858608701a92b946f7738b18d5dbd47f71f89f3c0d5d87cc52be838e790afda0c093a4e02eae5ef639dc6736d6a91abdeef8e654f926ee3a4a1770beb369d7c553d899ea4c08c98af352598479d9cca5babfa6cc0adfb615b97b7a2965225507677449f22418745102088d011b850279fddc83e9399d378d66ebac5573358cc3c0659257bfcf547b398ac9eff77129fc130c6603c478b741246aff77ab3dab69d7a354ed56b0486df8aa43d0de2ef5d04e0126374e0258");
			bodyString = spiderPage.getBody().asXml();
			System.out.println(bodyString);
			System.out.println(spiderPage.getBaseURI());

		} catch (FailingHttpStatusCodeException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (MalformedURLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
}
